@Article{VasconcellosClRoGaFeCaCo:2016:GPOpDa,
author = "Vasconcellos, Eduardo C. and Clua, Esteban W. G. and Rosa,
Reinaldo Roberto and Gazolla, Jo{\~a}o G. F. M. and Ferreira,
Nuno C{\'e}sar da R. and Carlquist, Victor and Costa, Carlos F.
da Silva",
affiliation = "{Universidade Federal Fluminense (UFF)} and {Universidade Federal
Fluminense (UFF)} and {Instituto Nacional de Pesquisas Espaciais
(INPE)} and {Universidade Federal Fluminense (UFF)} and {Instituto
Nacional de Pesquisas Espaciais (INPE)} and Instituto Federal de
Educa{\c{c}}{\~a}o, Ci{\^e}ncia e Tecnologia de S{\~a}o Paulo
and {University of Florida}",
title = "GPU optimization for data analysis of Mario Schenberg spherical
detector",
journal = "Procedia Computer Science",
year = "2016",
volume = "80",
pages = "2158--2168",
month = "June",
note = "International Conference on Computational Science 2016, ICCS 2016,
6-8 June 2016, San Diego, California, USA",
keywords = "GPU Computing, Many Matrix Operations, Gravitational Waves,
Astrophysical Events, Astrophysical Data Analysis.",
abstract = "The Gravitational Wave (GW) detectors, advanced LIGO and advanced
Virgo, are acquiring the potential for recording unprecedented
astronomic data for astrophysical events. The Mario Schenberg
detector (MSD) is a smaller scale experiment that could
participate to this search. Previously, we developed a first data
analysis pipeline (DAP) to transform the detector's signal into
relevant GW information. This pipeline was extremely simplified in
order to be executed in low-latency. In order to improve the
analysis methods while keeping a low execution time, we propose
three different parallel approaches using GPU/CUDA. We implemented
the parallel models using cuBLAS library functions and enhance its
capability with asynchronous processes in CUDA streams. Our novel
model achieves performances that surpass the serial implementation
within the data analysis pipeline by a speed up of 21% faster than
the traditional model. This first result is part of a more
comprehensive approach, in which all DAP modules that can be
parallelized, are being re-written in GPGP/CUDA, and then tested
and validated within the MSD context.",
doi = "10.1016/j.procs.2016.05.375",
url = "http://dx.doi.org/10.1016/j.procs.2016.05.375",
issn = "1877-0509",
language = "en",
targetfile = "vasconcelos_gpu.pdf",
urlaccessdate = "28 abr. 2024"
}